# -*- coding: utf-8 -*-
import scrapy
from itcast.items import ItcastItem

class DemoSpider(scrapy.Spider):
    # 启动的名字
    name = 'tenxun'
    # 获取数据的范围
    allowed_domains = ['tencent.com']
    # 数据的url
    start_urls = ['https://hr.tencent.com/position.php?keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&lid=0&tid=0']

    def parse(self, response):
        # print()
        # 取出要获取的xpath
        div_list = response.xpath('//div')

        for div in div_list:
            # 导入限制字段 确报不会写错字段
            dict_data = ItcastItem()
            # 地名
            dict_data['city'] = div.xpath('./a//text()').extract_first()
            # 姓名
            # dict_data['name'] = div.xpath('./td[2]/text()')[0].extract()
            # 职称
            # dict_data['position'] = div.xpath('./h4/text()')[0].extract()
            # # 介绍
            # dict_data['info'] = div.xpath('./p/text()')[0].extract()

            print('*' * 50)
            print(dict_data)
            # 这里不能return 否则会只返回第一个
            yield dict_data






